!! Copyright (C) 2009,2010,2011,2012  Marco Restelli
!!
!! This file is part of:
!!   FEMilaro -- Finite Element Method toolkit
!!
!! FEMilaro is free software; you can redistribute it and/or modify it
!! under the terms of the GNU General Public License as published by
!! the Free Software Foundation; either version 3 of the License, or
!! (at your option) any later version.
!!
!! FEMilaro is distributed in the hope that it will be useful, but
!! WITHOUT ANY WARRANTY; without even the implied warranty of
!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
!! General Public License for more details.
!!
!! You should have received a copy of the GNU General Public License
!! along with FEMilaro; If not, see <http://www.gnu.org/licenses/>.
!!
!! author: Marco Restelli                   <marco.restelli@gmail.com>

!>\brief
!! Provides some tools for defining <em>sections</em> in the grid and
!! computing statistics on them.
!!
!! \details
!!
!! \section general General layout
!!
!! A section is specified as a collection of points, vertexes, faces,
!! elements or boundary regions. Sections are typically distributed on
!! an arbitrary number of processors, and used to compute reduction
!! operations such as summation, maximum or minimum. This module
!! should provide the following functionalities.
!! <ul>
!!  <li> Sections are read from input files and localized on the grid
!!  (taking into account the grid partitioning).
!!  <li> Sections are collected in <tt>type(t_section_collection)</tt>
!!  objects so that the corresponding reduction operations can be made
!!  together.
!!  <li> Each processor can update the section values efficiently. The
!!  typical code will look like this
!!  \code
!!   type(t_section_collection) :: sections
!!
!!   ! Define some side sections with 2 register and 4 data; the first
!!   ! register will be used for summations, the second for maximum.
!!
!!   do is=1,grid%ns
!!
!!     ! do some side computations ...
!!
!!     do i=1,sections%s2ss(is)%nvs
!!
!!       ! compute diagnostic_sum(1:4) and diagnostic_max(1:4) on is
!!
!!       sections%s2ss(is)%s(i)%p%regs(:,1) = &
!!         sections%s2ss(is)%s(i)%p%regs(:,1) + diagnostic_sum
!!       sections%s2ss(is)%s(i)%p%regs(:,2) = &
!!         max( sections%s2ss(is)%s(i)%p%regs(:,2) , diagnostic_max )
!!     enddo
!!   enddo
!!
!!   call sync_section_collection(sections,mpi_sum,1)
!!   call sync_section_collection(sections,mpi_sum,2)
!!   ! Get the reduced values in sections%sections%regs
!!  \endcode
!!  <li> The code should scale well with many sections and many
!!  processors: avoid global arrays, use a dedicated communicator for
!!  each section.
!!  <li> The code should be reasonably efficient: use nonblocking
!!  communications so that the reduction operations on the various
!!  sections can be performed in any order (without dead-locking!)
!! </ul>
!!
!! \section parallel MPI communications
!!
!! Typically each section intersects a limited number of subdomains,
!! and each subdomain can be intersected by an arbitrary number of
!! sections (possibly zero). To handle this efficiently, each section
!! receives a dedicated MPI communicator. Since each processor then
!! cares only about its own sections, and since there is no global
!! ordering among the various sections, care must be taken to avoid
!! locking. This can be done using nonblocking communications. The
!! details, anyway, are handled in this module - the user of this
!! module nevertheless must ensure that a reduction is called by
!! <em>all</em> the processors sharing a \c t_section_collection
!! object.
!<----------------------------------------------------------------------
module mod_sections

!-----------------------------------------------------------------------

 use mod_messages, only: &
   mod_messages_initialized, &
   error,   &
   warning, &
   info

 use mod_kinds, only: &
   mod_kinds_initialized, &
   wp

 use mod_fu_manager, only: &
   mod_fu_manager_initialized, &
   new_file_unit

 use mod_mpi_utils, only: &
   mod_mpi_utils_initialized, &
   mpi_integer, mpi_logical,     &
   wp_mpi, mpi_max, mpi_lor,     &
   mpi_status_size, mpi_sum,     &
   mpi_comm_size, mpi_comm_rank, &
   mpi_comm_world,               &
   mpi_comm_split,               &
   mpi_comm_free,                &
   mpi_undefined,                &
   mpi_request_null,             &
   mpi_recv,  mpi_send,          &
   mpi_irecv, mpi_isend,         &
   mpi_bcast,                    &
   mpi_wait, mpi_waitall,        &
   mpi_allreduce,                &
   mpi_alltoall, mpi_alltoallv!,  &
   !mpix_iallreduce

 use mod_octave_io, only: &
   mod_octave_io_initialized, &
   write_octave,   &
   read_octave,    &
   read_octave_al, &
   locate_var

 use mod_perms, only: &
   mod_perms_initialized, &
   t_dperm, dperm_reduce, &
   operator(.eq.)

 use mod_grid, only: &
   mod_grid_initialized, &
   locate_point, t_grid, &
   t_ddc_grid,           &
   affmap, iaffmap

!-----------------------------------------------------------------------
 
 implicit none

!-----------------------------------------------------------------------

! Module interface

 public :: &
   mod_sections_constructor, &
   mod_sections_destructor,  &
   mod_sections_initialized, &
   t_section_collection,   &
   t_point_section, t_side_section, &
   c_interp_data, &
   new_section_collection, validate_section_collection, &
   clear, sync_section_collection, write_octave

 private

!-----------------------------------------------------------------------

! Module types and parameters

 !----------------------------------------------------------------------
 ! Section description

 type, abstract :: c_section_details
  ! to be extended according to the supported section types
 end type c_section_details

 type, extends(c_section_details) :: t_point_section
  integer :: np !< number of points
  real(wp), allocatable ::  x(:,:) !< point coords (phisical space)
  real(wp), allocatable :: xi(:,:) !< point coords (reference space)
  integer,  allocatable :: p2e(:) !< point to element
 end type t_point_section

 type, extends(c_section_details) :: t_vert_section
  integer :: nv !< number of vertices
  integer,  allocatable :: v2v(:) !< section vert. to grid vert.
 end type t_vert_section

 type, extends(c_section_details) :: t_side_section
  integer :: ns !< number of sides
  integer,  allocatable :: s2s(:) !< section side to grid side
  integer,  allocatable :: p(:) !< parity (i.e. orientation)
 end type t_side_section

 type, extends(c_section_details) :: t_elem_section
  integer :: ne !< number of elements
  integer,  allocatable :: e2e(:) !< section elem. to grid elem.
 end type t_elem_section

 !> Boundary side section: can be seen as a side section where all the
 !! sides are boundary sides.
 !!
 !! \todo Consider adding additional fields pointing to the elements
 !! of a \c t_bcs object
 type, extends(c_section_details) :: t_bside_section
  integer :: nbr !< number of boundary regions
  integer,  allocatable :: ibr(:) !< boundary region identifiers
  type(t_side_section) :: side_section !< corresponding side section
 end type t_bside_section

 !> Generic section.
 !!
 !! This type describes a generic section, regardless of whether it is
 !! composed of points, vertexes, sides, elements or side faces. The
 !! specific details for each type of section are stored in the
 !! polymorphic component \c section_details.
 type :: t_section
  integer :: ndata !< number of data for each register
  integer :: nregs !< number of registers
  real(wp), allocatable :: regs(:,:) !< data registers (ndata,nregs)
  character(len=100) :: section_name !< section identifier
  !> Section volume: this is the volume of the entire section (i.e.
  !! not only of the portion belonging to this processor) and is used
  !! to compute average values. Depending on the section type, this
  !! can be the number of points in the section, or a surface area, or
  !! a volume.
  real(wp) :: svol
  class(c_section_details), allocatable :: section_details
  integer, private :: comm  !< section MPI communicator
  integer, allocatable, private :: req(:), mpi_stat(:,:) !< MPI data
  !> This field is used as send buffer, while the register is used as
  !! receive buffer
  real(wp), allocatable, private :: sbuf(:,:)
  !> Set once the section is validated
  logical :: validated = .false.
 end type t_section
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Section collections and access

 type t_sect_pointer
  type(t_section), pointer :: p => null()
 end type t_sect_pointer

 type, abstract :: c_interp_data
  ! to be extended by the user
 end type c_interp_data

 !> For each element, access the corresponding points and point
 !! sections.
 !!
 !! Notice that the points located in one element can belong to
 !! different sections, but each point belongs to exactly one section.
 !!
 !! An additional field \c interp_data is included which can be
 !! defined by the user to store information used for the
 !! interpolation, typically the values of some basis functions at the
 !! points \c x.
 type :: t_e2ps
  integer :: np      !< number of points (0 if none)
  real(wp), allocatable :: x (:,:) !< point coordinates (physical)
  real(wp), allocatable :: xi(:,:) !< point coordinates (reference)
  type(t_sect_pointer), allocatable :: s(:)
  class(c_interp_data),  allocatable :: interp_data
 end type t_e2ps

 !> A vertex can belong to an arbitrary number of vertex sections
 type :: t_v2vs
  integer :: ns !< number of sections (0 if none)
  type(t_sect_pointer), allocatable :: s(:)
 end type t_v2vs

 !> A side can belong to an arbitrary number of side sections
 type :: t_s2ss
  integer :: ns !< number of sections (0 if none)
  integer, allocatable :: p(:) !< parity (i.e. relative orientation)
  type(t_sect_pointer), allocatable :: s(:)
 end type t_s2ss

 !> An element can belong to an arbitrary number of element sections
 type :: t_e2es
  integer :: ns !< number of sections (0 if none)
  type(t_sect_pointer), allocatable :: s(:)
 end type t_e2es

 type :: t_bs2bss
  ! Still not sure about the best layout
 end type t_bs2bss

 !> This is the main type to collect some sections
 !!
 !! The user should use only this type to create objects; all the
 !! other types are used in the components of \c t_section_collection.
 !! After creating a \c t_section_collection object on a set of
 !! processors (with a collective operation), any collective call to a
 !! reduction operation on such an object will not dead-lock; the
 !! details of the MPI reductions for all the sections in the
 !! collection are dealt with in this module.
 !!
 !! Together with the section information, a collection of "pointer"
 !! arrays is provided to access separately and efficiently the
 !! various sections. Notice that a given grid entity can be part of
 !! more than one section, which makes the access syntax slightly
 !! involved.
 !!
 !! \warning Since we use pointers, each \c t_section_collection
 !! object must have the \c target attribute.
 type :: t_section_collection
  type(t_section), allocatable :: sections(:)
  !> elem. to intersected point sections (<tt>grid\%ne</tt>)
  type(t_e2ps), allocatable :: e2ps(:)
  !> vert. to intersected vertex sections (<tt>grid\%nv</tt>)
  type(t_v2vs), allocatable :: v2vs(:)
  !> side to intersected side sections (<tt>grid\%ns</tt>)
  type(t_s2ss), allocatable :: s2ss(:)
  !> elem. to intersected element sections (<tt>grid\%ne</tt>)
  type(t_e2es), allocatable :: e2es(:)
  !> to boundary side sections
  type(t_bs2bss), allocatable :: bs2bss(:)
 end type t_section_collection
 !----------------------------------------------------------------------

 logical, protected ::               &
   mod_sections_initialized = .false.
 character(len=*), parameter :: &
   this_mod_name = 'mod_sections'

 !intrinsic :: norm2 ! this is indeed in the f2008 standard

 interface clear
   module procedure clear_section_collection
 end interface

 interface write_octave
   module procedure write_section, write_section_collection, &
     write_point_section_details, write_side_section_details
 end interface write_octave

!-----------------------------------------------------------------------

contains

!-----------------------------------------------------------------------

 subroutine mod_sections_constructor()
  character(len=*), parameter :: &
    this_sub_name = 'constructor'

   !Consistency checks ---------------------------
   if( (mod_messages_initialized  .eqv..false.) .or. &
       (mod_kinds_initialized     .eqv..false.) .or. &
       (mod_fu_manager_initialized.eqv..false.) .or. &
       (mod_mpi_utils_initialized .eqv..false.) .or. &
       (mod_octave_io_initialized .eqv..false.) .or. &
       (mod_perms_initialized     .eqv..false.) .or. &
       (mod_grid_initialized      .eqv..false.) ) then
     call error(this_sub_name,this_mod_name, &
                'Not all the required modules are initialized.')
   endif
   if(mod_sections_initialized.eqv..true.) then
     call warning(this_sub_name,this_mod_name, &
                  'Module is already initialized.')
   endif
   !----------------------------------------------

   mod_sections_initialized = .true.
 end subroutine mod_sections_constructor

!-----------------------------------------------------------------------
 
 subroutine mod_sections_destructor()
  character(len=*), parameter :: &
    this_sub_name = 'destructor'
   
   !Consistency checks ---------------------------
   if(mod_sections_initialized.eqv..false.) then
     call error(this_sub_name,this_mod_name, &
                'This module is not initialized.')
   endif
   !----------------------------------------------

   mod_sections_initialized = .false.
 end subroutine mod_sections_destructor

!-----------------------------------------------------------------------
 
 !> Define a \c t_section_collection object reading data from an
 !! ocatve file.
 !!
 !! If the file name is an empty string, an empty section collection
 !! is returned.
 !!
 !! The octave file \c sc_file_name is assumed to contain a cell array
 !! named \c sc_name such that each element of the cell array dscribes
 !! a section of the section collection \c sc.
 !!
 !! \note It is possible to pass an empy string as \c sc_file, in
 !! which case an empty statistic collection is generated.
 !!
 !! The elements of the cell array are processed one at the time as
 !! follows:
 !! <ul>
 !!  <li> all the processors read the same cell array element,
 !!  corresponding to a given section
 !!  <li> each processor checks whether it is intersected by the
 !!  section
 !!  <li> the processors which are intersected by the section define
 !!  it and store it in a temporary linked list; the remaining
 !!  processors wait to read the next cell array
 !!  <li> finally, each processor stores its sections in \c sc and
 !!  deletes the linked list.
 !! </ul>
 !! This procedure allows for good flexibility while at the same time
 !! reducing the memory requirements.
 !!
 !! \note All the sections, except the boundary region ones, are
 !! defined in terms of point or vertexes Cartesian coordinates, which
 !! are then mapped to grid entities within a tolerance which is also
 !! specified in the input file \c sc_file_name, in a variable called
 !! \c vertex_location_tolerance. This is done because Cartesian
 !! coordinates are independent of the grid partitioning.
 !!
 !! The input file must contain the following variables:
 !! \code{.m}
 !! # tolerance to identify grid vertexes in terms of Cartesian coords.
 !! vertex_location_tolerance = toll
 !!
 !! # section collection: a cell array with name <sc_name> where each
 !! # element is a struct with fields depending on the section type
 !! struct( ... # for point section
 !!   'section_type','point_section', ...
 !!   'section_name','<your section name>', ...
 !!              'x',x ...
 !!       )
 !!
 !! struct( ... # for side section
 !!   'section_type','side_section', ...
 !!   'section_name','<your section name>', ...
 !!       'side_v_x',x ... # three indexes: (coordinate,vertex,side)
 !!       )
 !! \endcode
 !!
 !! \warning After creating a new section collection with this
 !! subroutine, it is necessary to set the number of data and register
 !! and complete the definition of the section collection with \c
 !! validate_section_collection.
 !!
 !! \note The present implementation ensures that the sections are
 !! store in \c sc in increasing order. This means that blocking
 !! operations can be done as long as they are performed from
 !! <tt>sc\%sections(1)</tt> onwards. However, this has the effect of
 !! serializing the execution.
 subroutine new_section_collection(sc,sc_file_name,sc_name,grid , &
                                   ddc_grid,comm)
  type(t_section_collection), target, intent(out) :: sc
  character(len=*), intent(in) :: sc_file_name, sc_name
  type(t_grid),     intent(in), target   :: grid
  type(t_ddc_grid), intent(in), optional :: ddc_grid
  integer,          intent(in), optional :: comm
 
  logical :: fu_connected
  logical, allocatable :: keep_points(:)
  integer :: ierr, fu, i, j, jj, k, nsections_in_file, nsections
  integer :: section_intersects
  integer, allocatable :: ie_x(:), idx(:), iv(:), keep_sides(:,:)
  type(t_dperm) :: iv_red, ivs_red
  integer :: mpi_stat(mpi_status_size)
  real(wp) :: tol
  real(wp), allocatable :: x2(:,:), x3(:,:,:)
  character(len=*), parameter :: &
    this_sub_name = 'new_section_collection'
  character(len=1000+len_trim(sc_file_name)+len_trim(sc_name)) :: &
    message(4)
  character(len=100) :: charvar
  type :: t_section_list
   type(t_section) :: s
   type(t_section_list), pointer :: next => null()
  end type t_section_list
  type(t_section_list), pointer :: root => null(), head => null()

   if(len(trim(sc_file_name)).gt.0) then
     fu_connected = .true.
     call new_file_unit(fu,ierr)
     open(fu,file=trim(sc_file_name), iostat=ierr,                  &
       status='old',action='read',form='formatted',position='rewind')
     if(ierr.ne.0) then
       write(message(1),'(a)') 'Problems opening the section file'
       write(message(2),'(a,a,a)') '  "',trim(sc_file_name),'"'
       call error(this_sub_name,this_mod_name,message(1:2))
     endif
     tol = -1.0_wp
     call read_octave(tol,'vertex_location_tolerance',fu)
     if(tol.le.0.0_wp) then
       write(message(1),'(a)') &
         'Problems reading "vertex_location_tolerance" in file'
       write(message(2),'(a,a,a)') '  "',trim(sc_file_name),'"'
       write(message(3),'(a)') &
         ' Such a variable must be present and strictly positive.'
       call error(this_sub_name,this_mod_name,message(1:3))
     endif
     call locate_var(fu,sc_name,ierr)
     if(ierr.ne.0) then
       write(message(1),'(a)') 'Problems locating section collection'
       write(message(2),'(a,a,a)') '  "',trim(sc_name),'"'
       write(message(3),'(a)') ' in file'
       write(message(4),'(a,a,a)') '  "',trim(sc_file_name),'"'
       call error(this_sub_name,this_mod_name,message(1:4))
     endif
     ! get the number of sections in the collection  
     read(fu,'(A)') message(1) ! exclude "# type: cell"
     read(fu,'( A7,I10)') message(1), i ! "# rows:"
     read(fu,'(A10,I10)') message(1), j ! "# columns:"
     nsections_in_file = i*j
   else
     nsections_in_file = 0
     fu_connected = .false.
   endif

   allocate(root)
   ! compiler bug ?? With the following line, ifort works as
   ! expected, otherwise there are problems deallocating root at the
   ! end of the subroutine. This should be investigated further.
       deallocate(root); allocate(root)
   head => root
   nsections = 0

   ! These arrays are allocated now because some fields are updated
   ! during the read_loop loop.
   allocate( sc%e2ps(grid%ne) , sc%v2vs(grid%nv) , sc%s2ss(grid%ns) , &
             sc%e2es(grid%ne) , sc%bs2bss(0) )
   sc%e2ps%np = 0 ; sc%s2ss%ns = 0
                                                   
   read_loop: do i=1,nsections_in_file
     call read_octave(charvar,'section_type',fu, norewind=.true. )

     section_type_case: select case(charvar)

      case('point_section')
       call read_octave( head%s%section_name,'section_name', &
                         fu, norewind=.true. )
       ! read all the points in the section
       call read_octave_al(x2,'x',fu, norewind=.true. )
       ! the volume is the total number of points
       head%s%svol = real(size(x2,2),wp)
       ! check which points belong to the subdomain
       call locate_point(ie_x,x2,grid, ip=idx )
       ! make sure each point belongs to at most one element
       allocate(keep_points(size(x2,2)))
       keep_points = .false.; keep_points(idx) = .true.
       if(present(ddc_grid)) then
         if(ddc_grid%id.gt.0) then
           call mpi_recv( keep_points , size(x2,2) , mpi_logical , &
                        ddc_grid%id-1 , 1 , comm , mpi_stat , ierr )
           do j=1,size(idx)
             if(keep_points(idx(j))) then
               idx(j) = -1 ! point was taken already
             else
               keep_points(idx(j)) = .true.
             endif
           enddo
         endif
         if(ddc_grid%id.lt.ddc_grid%nd-1) then
           call mpi_send( keep_points , size(x2,2) , mpi_logical , &
                        ddc_grid%id+1 , 1 , comm , mpi_stat , ierr )
         else
           call check_missing_points()
         endif
       else
         call check_missing_points()
       endif
       deallocate(keep_points)
       ! check whether the section intersects the subdomain
       section_intersects = mpi_undefined
       if(count(idx.gt.0).gt.0) section_intersects = 1
       if(present(ddc_grid)) then
         call mpi_comm_split(comm,section_intersects,0,head%s%comm,ierr)
       else
         head%s%comm = mpi_comm_world
       endif
       if(section_intersects.eq.1) then
         nsections = nsections + 1
         allocate(t_point_section::head%s%section_details)
         select type(d=>head%s%section_details);type is(t_point_section)
           d%np = count(idx.gt.0)
           allocate(d%x  (grid%m,d%np))
           allocate(d%xi (grid%d,d%np))
           allocate(d%p2e(       d%np))
           j = 0
           do jj=1,size(idx)
             if(idx(jj).gt.0) then
               j = j+1
               d%p2e (j)   = ie_x(jj)
               d%x  (:,j)  = x2(:,idx(jj))
               d%xi(:,j:j) = iaffmap(grid%e(d%p2e(j)),d%x(:,j:j))
               sc%e2ps(d%p2e(j))%np = sc%e2ps(d%p2e(j))%np+1
             endif
           enddo
         end select
         allocate(head%next); head => head%next
       endif
       deallocate( x2 , ie_x , idx )

      case('side_section')
       call read_octave( head%s%section_name,'section_name', &
                         fu, norewind=.true. )
       ! read all the points in the section
       call read_octave_al(x3,'side_v_x',fu, norewind=.true. )
       ! identify the sides and keep those belonging to the subdomain
       allocate(keep_sides(2,size(x3,3))); keep_sides = -1
       allocate(iv(grid%d))
       side_do: do j=1,size(x3,3)
         do jj=1,grid%d
           iv(jj) = point2vertex(x3(:,jj,j),tol)
           if(iv(jj).lt.0) cycle side_do ! does not belong to sbd.
         enddo
         ! If we are here, we have localized all d+1 vertexes; to
         ! localize the side we have to take into account the possible
         ! permutations.
         iv_red = dperm_reduce(iv)
         ! check among the sides connected to the first vertex
         k = -1
         do jj=1,grid%v(iv(1))%ns
           ivs_red = dperm_reduce(grid%v(iv(1))%s(jj)%p%iv)
           if(iv_red.eq.ivs_red) then
             k = grid%v(iv(1))%is(jj)
             exit
           endif
         enddo
         if(k.lt.0) cycle side_do
         ! If we are here, we have localized the side. The last thing
         ! that must be checked is whether the side is shared by
         ! another processor.
         if(present(ddc_grid)) then
           if(ddc_grid%gs(k)%ni.gt.0) then
             if(ddc_grid%ns( ddc_grid%gs(k)%ni )%id.lt.ddc_grid%id) &
               cycle side_do
           endif
         endif
         ! If we are here, we have found a side belonging to the sbd.
         keep_sides(1,j) = k
         keep_sides(2,j) = iv_red%pi%p * ivs_red%pi%p ! parity
       enddo side_do
       call check_missing_sections()
       deallocate( iv , x3 )
       section_intersects = mpi_undefined
       if(count(keep_sides(1,:).gt.0).gt.0) section_intersects = 1
       if(present(ddc_grid)) then
         call mpi_comm_split(comm,section_intersects,0,head%s%comm,ierr)
       else
         head%s%comm = mpi_comm_world
       endif
       if(section_intersects.eq.1) then
         nsections = nsections + 1
         allocate(t_side_section::head%s%section_details)
         select type(d=>head%s%section_details);type is(t_side_section)
           d%ns = count(keep_sides(1,:).gt.0)
           allocate(d%s2s(d%ns))
           allocate(d%p  (d%ns))
           j = 0
           do jj=1,size(keep_sides,2)
             if(keep_sides(1,jj).gt.0) then
               j = j+1
               d%s2s(j) = keep_sides(1,jj)
               d%p  (j) = keep_sides(2,jj)
               sc%s2ss(d%s2s(j))%ns = sc%s2ss(d%s2s(j))%ns+1
             endif
           enddo
           call mpi_allreduce( sum(grid%s(d%s2s)%a) , head%s%svol , &
                              1, wp_mpi, mpi_sum, head%s%comm, ierr )
         end select
         allocate(head%next); head => head%next
       endif
       deallocate( keep_sides )

      case default
       write(message(1),'(a)') 'Unknown section type'
       write(message(2),'(a,a,a)') '  "',trim(charvar),'"'
       call error(this_sub_name,this_mod_name,message(1:2))

     end select section_type_case
                                                   
   enddo read_loop
   if(fu_connected) & ! for the case of missing file
     close(fu,iostat=ierr)                           

   ! Some allocations can be ancipated now (this ensures that the
   ! fields are allocated, possibly with zero size)
   do i=1,grid%ne
     allocate( sc%e2ps(i)%x (grid%m,sc%e2ps(i)%np) , &
               sc%e2ps(i)%xi(grid%m,sc%e2ps(i)%np) , &
               sc%e2ps(i)%s (       sc%e2ps(i)%np) )
   enddo
   do i=1,grid%ns
     allocate( sc%s2ss(i)%p(sc%s2ss(i)%ns) , &
               sc%s2ss(i)%s(sc%s2ss(i)%ns) )
   enddo

   ! We can now copy the section list into the sections allocatable
   ! componets, and meanwhile set up the section pointers. The section
   ! list is also deallocated.
   sc%e2ps%np = 0 ! will be used as counter
   sc%s2ss%ns = 0
   allocate(sc%sections(nsections))
   store_loop: do i=1,nsections
     head => root%next
     ! compiler bug: intel has problems with the following assignment:
     ! see http://software.intel.com/en-us/forums/topic/388577
     ! This is now DPD200243378
     !sc%sections(i) = root%s
     call workaround()
     select type(d=>sc%sections(i)%section_details)

      type is(t_point_section)
       do j=1,d%np ! points in the section
         sc%e2ps(d%p2e(j))%np = sc%e2ps(d%p2e(j))%np + 1
         sc%e2ps(d%p2e(j))%x (:,sc%e2ps(d%p2e(j))%np) = d%x (:,j)
         sc%e2ps(d%p2e(j))%xi(:,sc%e2ps(d%p2e(j))%np) = d%xi(:,j)
         sc%e2ps(d%p2e(j))%s (  sc%e2ps(d%p2e(j))%np)%p=>sc%sections(i)
       enddo

      type is(t_side_section)
       do j=1,d%ns ! sides in the section
         sc%s2ss(d%s2s(j))%ns = sc%s2ss(d%s2s(j))%ns + 1
         sc%s2ss(d%s2s(j))%p(sc%s2ss(d%s2s(j))%ns) = d%p(j)
         sc%s2ss(d%s2s(j))%s(sc%s2ss(d%s2s(j))%ns)%p=>sc%sections(i)
       enddo

      class default
       ! If we are here, there must be an inconsistency between the
       ! read_loop and the store_loop. Notice that this is a bug in
       ! the code, not a problem in the input file.
       write(message(1),'(a)') 'This should never happen!'
       write(message(2),'(a)') '  Something wrong in this subroutine.'
       call error(this_sub_name,this_mod_name,message(1:2))

     end select

     deallocate(root); root => head
   enddo store_loop
   ! One list object is always used as workspace
   deallocate(root)

 contains

  subroutine check_missing_points()
   integer :: nm, k, kk
   character(len=1000), allocatable :: amesg(:)

    nm = count(.not.keep_points) ! points not assigned
    if(nm.gt.0) then
      allocate(amesg(2+nm))
      write(amesg(1),'(a,i6,a)') &
    'The following ',nm,' points have not been located in any partition'
      write(amesg(2),'(a,a,a)') &
    '  section name: "',trim(head%s%section_name),'",'
      k = 0
      do kk=1,size(keep_points)
        if(.not.keep_points(kk)) then
          k = k + 1
          write(amesg(2+k),'(a,i6,a,*(e23.15))') &
            '  point ',kk,'; coords ',x2(:,kk)
        endif
      enddo
      call error(this_sub_name,this_mod_name,amesg)
      deallocate(amesg)
    endif
  end subroutine check_missing_points

  subroutine check_missing_sections()
   integer :: nm, k, kk, ierr
   character(len=1000), allocatable :: amesg(:)

    allocate(keep_points(2*size(keep_sides,2))) ! used as work array
    keep_points(size(keep_sides,2)+1:) = keep_sides(1,:).gt.0
    if(present(ddc_grid)) then
      call mpi_allreduce(keep_points(size(keep_sides,2)+1:), & ! send
                         keep_points( :size(keep_sides,2) ), & ! recv
                         size(keep_sides,2) , mpi_logical ,  &
                         mpi_lor , comm , ierr)
      ! The rest of the checks can be done on the master subdomain
      if(ddc_grid%id.ne.0) then
        deallocate(keep_points)
        return
      endif
    endif
    nm = count(.not.keep_points(:size(keep_sides,2))) ! not assigned
    if(nm.gt.0) then
      allocate(amesg(2+nm))
      write(amesg(1),'(a,i6,a)') &
    'The following ',nm,' sides have not been located in any partition'
      write(amesg(2),'(a,a,a)') &
    '  section name: "',trim(head%s%section_name),'",'
      k = 0
      do kk=1,size(keep_sides,2)
        if(.not.keep_points(kk)) then
          k = k + 1
          write(amesg(2+k),'(a,i6,a,*(e23.15))') &
            '  side ',kk,'; coords ',x3(:,:,kk)
        endif
      enddo
      call error(this_sub_name,this_mod_name,amesg)
      deallocate(amesg)
    endif
    deallocate(keep_points)
  end subroutine check_missing_sections

  pure function point2vertex(x,tol) result(iv)
   real(wp), intent(in) :: x(:), tol
   integer :: iv
    
   integer :: i

    iv = -1
    do i=1,grid%nv
      !if(norm2(x-grid%v(i)%x).le.tol) then
      ! workaround for FERMI
          if(sqrt(sum((x-grid%v(i)%x)**2)).le.tol) then
        iv = i
        exit
      endif
    enddo
  end function point2vertex

  ! compiler bug: this subroutine should not ne required at all: see
  ! http://software.intel.com/en-us/forums/topic/388577
  !sc%sections(i) = root%s
  subroutine workaround()
    sc%sections(i)%section_name = root%s%section_name
    sc%sections(i)%svol = root%s%svol
    sc%sections(i)%comm = root%s%comm
    allocate( sc%sections(i)%section_details , &
               source=root%s%section_details )
  end subroutine workaround

 end subroutine new_section_collection             
                                                   
!-----------------------------------------------------------------------

 !> This function has to clear the MPI communicators associated with
 !! thevarious sections. To do this, the intent has to be inout.
 subroutine clear_section_collection(sc)
  type(t_section_collection), intent(inout) :: sc

  integer :: i, ierr

   do i=lbound(sc%sections,1),ubound(sc%sections,1)
     if(sc%sections(i)%comm.ne.mpi_comm_world) &
       call mpi_comm_free(sc%sections(i)%comm,ierr)
   enddo
   ! now the trivial part
   deallocate( sc%sections , sc%e2ps , sc%v2vs , sc%s2ss , sc%e2es , &
               sc%bs2bss )

 end subroutine clear_section_collection

!-----------------------------------------------------------------------
 
 !> Allocate some private fields which depend on the number and size
 !! of registers.
 pure subroutine validate_section_collection(sc)
  type(t_section_collection), intent(inout) :: sc

  integer :: i, ndata, nregs
  character(len=*), parameter :: &
    this_sub_name = 'set_section_collection'

   do i=1,size(sc%sections)
     ndata = sc%sections(i)%ndata
     nregs = sc%sections(i)%nregs
     allocate(sc%sections(i)%regs(ndata,nregs))
     allocate(sc%sections(i)%sbuf(ndata,nregs))
     allocate(sc%sections(i)%req (      nregs))
     allocate(sc%sections(i)%mpi_stat(mpi_status_size,nregs))
     sc%sections(i)%validated = .true.
   enddo
 
 end subroutine validate_section_collection
 
!-----------------------------------------------------------------------
 
 !> Evaluate a reduction operation for all the sections in a section
 !! collection.
 !!
 !! This subroutine is rather flexible, allowing the following
 !! options.
 !! <ul>
 !!  <li> Specify the MPI reduction operator.
 !!  <li> Specify the register to be reduced; it is allowed to use an
 !!  index which is larger than the number of registers, in which case
 !!  no reductions are performed - this can be used to perform a
 !!  reduction only on a subset of the sections contained in \c sc.
 !!  <li> Specify which sections should be reduced: the character
 !!  argument \c sections should contain the following substrings to
 !!  select a section type
 !!  <ul>
 !!   <li> <tt>'p'</tt> \f$\mapsto\f$ point sections
 !!   <li> <tt>'v'</tt> \f$\mapsto\f$ vertex sections
 !!   <li> <tt>'s'</tt> \f$\mapsto\f$ side sections
 !!   <li> <tt>'e'</tt> \f$\mapsto\f$ element sections
 !!   <li> <tt>'b'</tt> \f$\mapsto\f$ boundary side sections.
 !!  </ul>
 !!  Any combination of characters can be specified; if the value is
 !!  not present all the sections are reduced.
 !!  <li> Specify whether the reduction should be started, completed,
 !!  or started and completed, by the optional argument \c red_type:
 !!  <ul>
 !!   <li> <tt>'start'</tt> \f$\mapsto\f$ start communication
 !!   <li> <tt>'wait'</tt> \f$\mapsto\f$ wait an already started
 !!   communication for completion
 !!   <li> <tt>'block'</tt> \f$\mapsto\f$ start and complete the
 !!   communication (this is then a blocking operation).
 !!  </ul>
 !!  The default behaviour is <tt>'block'</tt>.
 !! </ul>
 !!
 !! \todo The nonblocking version requires \c mpi_iallreduce which is
 !! a very recent MPI function and might not be supported by many MPI
 !! implementation yet. The alternative approach is using \c
 !! mpi_allreduce, which would be (much) less efficient, or
 !! implementing a "home made" version of \c mpi_iallreduce using
 !! point-to-point communications, which is again complicated and
 !! likely not very efficient.
 subroutine sync_section_collection(sc,op,reg,sections,red_type)
  type(t_section_collection), target, intent(inout) :: sc
  integer, intent(in) :: op  !< MPI reduction operator
  integer, intent(in) :: reg !< register to be reduced
  !> Sections to be reduced
  character(len=*), intent(in), optional :: sections
  !> Type of MPI communication
  character(len=*), intent(in), optional :: red_type
 
  logical :: sync_what(5), do_what(2)
  integer :: i, ierr
  character(len=*), parameter :: &
    this_sub_name = 'sync_section_collection'

   ! Check the input parameters
   if(present(sections)) then
     sync_what(1) = index(sections,'p').gt.0
     sync_what(2) = index(sections,'v').gt.0
     sync_what(3) = index(sections,'s').gt.0
     sync_what(4) = index(sections,'e').gt.0
     sync_what(5) = index(sections,'b').gt.0
   else
     sync_what = .true.
   endif
   if(present(red_type)) then
     select case(trim(red_type))
      case('start')
       do_what = (/.true.,.false./)
      case('wait')
       do_what = (/.false.,.true./)
      case('block')
       do_what = (/.true. ,.true./)
      case default
       call error(this_sub_name,this_mod_name,                     &
         'Unknown reduction type: red_type="'//trim(red_type)//'".')
     end select
   else
     do_what = .true.
   endif

   ! Consistency check
   if(any(.not.sc%sections%validated)) &
     call error(this_sub_name,this_mod_name, 'The sections must ' // &
       'be validated before one can use this subroutine.')

   ! Start the reduction operations
   start_if: if(do_what(1)) then
     do i=1,size(sc%sections)
       if(reg.le.sc%sections(i)%nregs) then
         select type(d=>sc%sections(i)%section_details)
          type is(t_point_section)
           if(sync_what(1)) call red_section()
          type is(t_vert_section)
           if(sync_what(2)) call red_section()
          type is(t_side_section)
           if(sync_what(3)) call red_section()
          type is(t_elem_section)
           if(sync_what(4)) call red_section()
          type is(t_bside_section)
           if(sync_what(5)) call red_section()
         end select
       endif
     enddo
   endif start_if

   ! Wait for completion
   wait_if: if(do_what(2)) then
     ! An alternative implementation would be making a big array with
     ! all the requests to be waited for and perform a single call to
     ! mpi_wait - I have no idea whether there would be any benefit
     ! doing this. Notice that with such an implementation one should
     ! set the req fields in sc%sections to mpi_request_null
     do i=1,size(sc%sections)
       if(reg.le.sc%sections(i)%nregs) then
         select type(d=>sc%sections(i)%section_details)
          type is(t_point_section)
           if(sync_what(1)) call mpi_wait( sc%sections(i)%req(reg) , &
                                sc%sections(i)%mpi_stat(:,reg), ierr )
          type is(t_vert_section)
           if(sync_what(2)) call mpi_wait( sc%sections(i)%req(reg) , &
                                sc%sections(i)%mpi_stat(:,reg), ierr )
          type is(t_side_section)
           if(sync_what(3)) call mpi_wait( sc%sections(i)%req(reg) , &
                                sc%sections(i)%mpi_stat(:,reg), ierr )
          type is(t_elem_section)
           if(sync_what(4)) call mpi_wait( sc%sections(i)%req(reg) , &
                                sc%sections(i)%mpi_stat(:,reg), ierr )
          type is(t_bside_section)
           if(sync_what(5)) call mpi_wait( sc%sections(i)%req(reg) , &
                                sc%sections(i)%mpi_stat(:,reg), ierr )
         end select
       endif
     enddo
   endif wait_if

 contains

  !> This subroutine is the same for all different section types
  subroutine red_section()
    sc%sections(i)%sbuf(:,reg) = sc%sections(i)%regs(:,reg)
    !call mpix_iallreduce(          &
    !  sc%sections(i)%sbuf(:,reg) , & ! send buffer
    !  sc%sections(i)%regs(:,reg) , & ! receive buffer
    !  sc%sections(i)%ndata       , & ! count
    !           wp_mpi            , &
    !             op              , &
    !  sc%sections(i)%comm        , & ! communicator
    !  sc%sections(i)%req(reg)    , & ! request
    !            ierr)
    ! Alternative sequential implementation: use mpi_allreduce and set
    ! the corresponding req to mpi_request_null so that the following
    ! wait call return trivially. Notice that since s%sections is
    ! accessed in increasing order on all the processors, using the
    ! blocking reduce will not dead-lock.
    call mpi_allreduce(            &
      sc%sections(i)%sbuf(:,reg) , & ! send buffer
      sc%sections(i)%regs(:,reg) , & ! receive buffer
      sc%sections(i)%ndata       , & ! count
               wp_mpi            , &
                 op              , &
      sc%sections(i)%comm        , & ! communicator
                ierr)
    sc%sections(i)%req(reg) = mpi_request_null
  end subroutine red_section
 
 end subroutine sync_section_collection
 
!-----------------------------------------------------------------------
 
 subroutine write_section_collection(sc,var_name,fu)
  integer, intent(in) :: fu
  type(t_section_collection), target, intent(in) :: sc
  character(len=*), intent(in) :: var_name
 
  character(len=*), parameter :: &
    this_sub_name = 'write_section_collection'

   write(fu,'(a,a)')    '# name: ',var_name
   write(fu,'(a)')      '# type: struct'
   write(fu,'(a)')      '# length: 1' ! number of fields

   ! field 1 : sections
   write(fu,'(a)')      '# name: sections'
   write(fu,'(a)')      '# type: cell'
   write(fu,'(a)')      '# rows: 1'
   write(fu,'(a)')      '# columns: 1'
   call write_octave(sc%sections,'<cell-element>',fu)
 
 end subroutine write_section_collection
 
!-----------------------------------------------------------------------

 subroutine write_section(s,var_name,fu)
  integer, intent(in) :: fu
  type(t_section), target, intent(in) :: s(:)
  character(len=*), intent(in) :: var_name
 
  integer :: is
  character(len=*), parameter :: &
    this_sub_name = 'write_section'

   write(fu,'(a,a)')    '# name: ',var_name
   write(fu,'(a)')      '# type: struct'
   write(fu,'(a)')      '# length: 7' ! number of fields

   ! field 1 : ndata
   write(fu,'(a)')      '# name: ndata'
   write(fu,'(a)')      '# type: cell'
   write(fu,'(a)')      '# rows: 1'
   write(fu,'(a,i7)')   '# columns: ',size(s)
   do is=1,size(s)
     call write_octave(s(is)%ndata,'<cell-element>',fu)
   enddo

   ! field 2 : nregs
   write(fu,'(a)')      '# name: nregs'
   write(fu,'(a)')      '# type: cell'
   write(fu,'(a)')      '# rows: 1'
   write(fu,'(a,i7)')   '# columns: ',size(s)
   do is=1,size(s)
     call write_octave(s(is)%nregs,'<cell-element>',fu)
   enddo

   ! field 3 : regs
   write(fu,'(a)')      '# name: regs'
   write(fu,'(a)')      '# type: cell'
   write(fu,'(a)')      '# rows: 1'
   write(fu,'(a,i7)')   '# columns: ',size(s)
   do is=1,size(s)
     call write_octave(s(is)%regs,'<cell-element>',fu)
   enddo

   ! field 4 : section_name
   write(fu,'(a)')      '# name: section_name'
   write(fu,'(a)')      '# type: cell'
   write(fu,'(a)')      '# rows: 1'
   write(fu,'(a,i7)')   '# columns: ',size(s)
   do is=1,size(s)
     call write_octave(s(is)%section_name,'<cell-element>',fu)
   enddo

   ! field 5 : svol
   write(fu,'(a)')      '# name: svol'
   write(fu,'(a)')      '# type: cell'
   write(fu,'(a)')      '# rows: 1'
   write(fu,'(a,i7)')   '# columns: ',size(s)
   do is=1,size(s)
     call write_octave(s(is)%svol,'<cell-element>',fu)
   enddo

   ! field 6 : section_details
   write(fu,'(a)')      '# name: section_details'
   write(fu,'(a)')      '# type: cell'
   write(fu,'(a)')      '# rows: 1'
   write(fu,'(a,i7)')   '# columns: ',size(s)
   do is=1,size(s)
     select type(d=>s(is)%section_details)
      type is(t_point_section)
       call write_octave(d,'<cell-element>',fu)
      type is(t_side_section)
       call write_octave(d,'<cell-element>',fu)
      class default
       call error(this_sub_name,this_mod_name,'Not yet implemented')
     end select
   enddo

   ! field 7 : comm
   write(fu,'(a)')      '# name: comm'
   write(fu,'(a)')      '# type: cell'
   write(fu,'(a)')      '# rows: 1'
   write(fu,'(a,i7)')   '# columns: ',size(s)
   do is=1,size(s)
     call write_octave(s(is)%comm,'<cell-element>',fu)
   enddo

 end subroutine write_section
 
!-----------------------------------------------------------------------

 subroutine write_point_section_details(s,var_name,fu)
  integer, intent(in) :: fu
  type(t_point_section), intent(in) :: s
  character(len=*), intent(in) :: var_name
 
  character(len=*), parameter :: &
    this_sub_name = 'write_point_section_details'

   write(fu,'(a,a)')    '# name: ',var_name
   write(fu,'(a)')      '# type: struct'
   write(fu,'(a)')      '# length: 4' ! number of fields

   ! field 1 : np
   write(fu,'(a)')      '# name: np'
   write(fu,'(a)')      '# type: cell'
   write(fu,'(a)')      '# rows: 1'
   write(fu,'(a)')   '# columns: 1'
   call write_octave(s%np,'<cell-element>',fu)

   ! field 2 : x
   write(fu,'(a)')      '# name: x'
   write(fu,'(a)')      '# type: cell'
   write(fu,'(a)')      '# rows: 1'
   write(fu,'(a)')   '# columns: 1'
   call write_octave(s%x,'<cell-element>',fu)

   ! field 3 : xi
   write(fu,'(a)')      '# name: xi'
   write(fu,'(a)')      '# type: cell'
   write(fu,'(a)')      '# rows: 1'
   write(fu,'(a)')   '# columns: 1'
   call write_octave(s%xi,'<cell-element>',fu)

   ! field 4 : p2e
   write(fu,'(a)')      '# name: p2e'
   write(fu,'(a)')      '# type: cell'
   write(fu,'(a)')      '# rows: 1'
   write(fu,'(a)')   '# columns: 1'
   call write_octave(s%p2e,'r','<cell-element>',fu)

 end subroutine write_point_section_details

!-----------------------------------------------------------------------

 subroutine write_side_section_details(s,var_name,fu)
  integer, intent(in) :: fu
  type(t_side_section), intent(in) :: s
  character(len=*), intent(in) :: var_name
 
  character(len=*), parameter :: &
    this_sub_name = 'write_side_section_details'

   write(fu,'(a,a)')    '# name: ',var_name
   write(fu,'(a)')      '# type: struct'
   write(fu,'(a)')      '# length: 3' ! number of fields

   ! field 1 : ns
   write(fu,'(a)')      '# name: ns'
   write(fu,'(a)')      '# type: cell'
   write(fu,'(a)')      '# rows: 1'
   write(fu,'(a)')   '# columns: 1'
   call write_octave(s%ns,'<cell-element>',fu)

   ! field 2 : s2s
   write(fu,'(a)')      '# name: s2s'
   write(fu,'(a)')      '# type: cell'
   write(fu,'(a)')      '# rows: 1'
   write(fu,'(a)')   '# columns: 1'
   call write_octave(s%s2s,'r','<cell-element>',fu)

   ! field 3 : p
   write(fu,'(a)')      '# name: p'
   write(fu,'(a)')      '# type: cell'
   write(fu,'(a)')      '# rows: 1'
   write(fu,'(a)')   '# columns: 1'
   call write_octave(s%p,'r','<cell-element>',fu)

 end subroutine write_side_section_details

!-----------------------------------------------------------------------

end module mod_sections

